library(ggpubr)
library(gprofiler2)
library(tidyverse)
library(readxl)Hackathon19_visualise_ontology project directory. Provided you have opened Hackathon19_visualise_ontology as a project this should already be the case.getwd()## [1] "/home/rreynolds/misc_projects/Hackathon19_visualise_ontology"
# import PD GWAS table
PD_res <-
readxl::read_excel(path = "./data/TableS2_Detailed_summary_statistics_nominated_risk_variants.xlsx")
# Creating a named list
geneLists <-
PD_res %>%
# Select two columns we wish to turn into lists
dplyr::select(`Nearest Gene`, `QTL Nominated Gene (nearest QTL)`) %>%
# Coerce columns into list format
as.list() %>%
# Across each list do the following...
lapply(., function(x){
x %>%
# Remove NAs
na.omit() %>%
# Convert to character vector
as.character() %>%
# Keep only unique genes
unique()
}) %>%
# Name the lists
setNames(., c("nearest_gene", "QTL_nom_gene"))
# running gprofiler
gprofilerOutput <- gprofiler2::gost(query = geneLists, # if lists are named then gost can use this an input
organism = "hsapiens", # set the organism
correction_method = c("gSCS"), # select a correction method (gSCS reccomended in gost documentation)
domain_scope = c("annotated"), # select whether to restrict the background set
sources = c("GO"), # databases would you like to search for pathways & processes (others: "KEGG","REAC","WP")
significant = TRUE, # when TRUE displays only significant results (p<0.05)
evcodes = TRUE, # when TRUE displays the IDs of the genes calling each pathway
ordered_query = FALSE) # when TRUE the input list is ordered in some meaningful waydomain_scope argument) is an important choice, as it should represent the population of genes your list was sampled from. E.g. If you were to perform a differential gene expression experiment in brain samples from control and PD, your background list would be all the expressed genes you tested.
gprofiler2::gost provides 3 different options: “annotated”, “known”, “custom”. For more details see: https://biit.cs.ut.ee/gprofiler/page/docs#statistical_domain_scopegprofilerOutput$result %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')query, and within this we find the names of our inputted lists i.e. nearest_gene, QTL_nom_genegprofilerOutput$result %>%
# Filter for duplicated terms
dplyr::filter(duplicated(term_id) | duplicated(term_id, fromLast = TRUE)) %>%
# Rearrange column order to have term_id and term_name at beginning
dplyr::select(term_id,term_name, everything()) %>%
# Arrange rows by term_id
dplyr::arrange(term_id) %>%
DT::datatable(rownames = FALSE,
options = list(scrollX = TRUE),
class = 'white-space: nowrap')source("./R/gen_gprofiler_barplot.R")
plot_list <-
gprofilerOutput$result %>%
# Group by query
dplyr::group_by(query) %>%
# Split dataframe by query groups i.e. nearest_gene and QTL_nom_gene
dplyr::group_split() %>%
# For each dataframe plot a barplot
lapply(., function(x){
gen_gprofiler_barplot(DF = x,
num_categories = 15,
plot_title = "")
})
# ggarrange allows us to arrange multiple ggplots on the same page
ggpubr::ggarrange(plotlist = plot_list,
labels = c("nearest_gene", "QTL_nom_gene"),
nrow = 2) gprofiler2::gostplot(gprofilerOutput) # Save results with a column denoting an x-value between the range 1-2
results <- gprofilerOutput$result %>%
dplyr::mutate(x = 1.5)
# Need to create a duplicate table wherein a range (xmin, xmax) is created for each term
results_dup <- results %>%
dplyr::select(-x) %>%
dplyr::inner_join(results %>%
dplyr::distinct(term_id) %>%
dplyr::mutate(xmin = 1,
xmax = 2) %>%
tidyr::gather(key = x_value, value = x, -term_id) %>%
dplyr::select(-x_value))
# Initialising first circle with points
library(circlize)
circos.par("track.height" = 0.1)
circos.initialize(factors = results_dup$term_id, x = results_dup$x)
circos.track(factors = results$term_id, x = results$x, y = results$precision,
panel.fun = function(x, y) {
circos.points(x,y)
})# Adding text to circle
library(circlize)
circos.par("track.height" = 0.1)
circos.initialize(factors = results_dup$term_id, x = results_dup$x)
circos.track(factors = results$term_id, x = results$x, y = results$precision,
panel.fun = function(x, y) {
circos.points(x,y)
labels = get.cell.meta.data("sector.index")
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
circos.text(x = mean(xlim), y = 0.9,
labels = labels,
cex = 0.5,
facing = "clockwise",
niceFacing = TRUE)
})# Moving text in circle
library(circlize)
circos.par("track.height" = 0.1)
circos.initialize(factors = results_dup$term_id, x = results_dup$x)
circos.track(factors = results$term_id, x = results$x, y = results$precision,
panel.fun = function(x, y) {
circos.points(x,y)
labels = get.cell.meta.data("sector.index")
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
circos.text(x = mean(xlim), y = 2,
labels = labels,
cex = 0.75,
facing = "clockwise",
niceFacing = TRUE)
})chordDiagram() function as opposed to building our own tracks from scratchget_intersections <- function(gprofiler_results, term_id_or_name = c("id", "name")){
if(term_id_or_name == "id"){
intersect_df <- data.frame(term_id_1 = gprofiler_results$term_id %>% unique() %>% as.character(),
term_id_2 = gprofiler_results$term_id %>% unique() %>% as.character()) %>%
tidyr::expand(term_id_1, term_id_2) %>%
dplyr::mutate(term_id_1 = term_id_1 %>% as.character(),
term_id_2 = term_id_2 %>% as.character()) %>%
dplyr::filter(term_id_1 < term_id_2) %>%
dplyr::inner_join(gprofiler_results %>%
dplyr::select(term_id, intersection), by = c("term_id_1" = "term_id")) %>%
dplyr::inner_join(gprofiler_results %>%
dplyr::select(term_id, intersection), by = c("term_id_2" = "term_id")) %>%
dplyr::mutate(intersection.x = intersection.x %>%
str_split(., ","),
intersection.y = intersection.y %>%
str_split(., ","),
intersection_of_x_and_y = Map(intersect, intersection.x, intersection.y),
number_of_intersections = Map(length, intersection_of_x_and_y) %>%
unlist()) %>%
dplyr::distinct(term_id_1, term_id_2, .keep_all = TRUE) %>%
dplyr::mutate(from = term_id_1,
to = term_id_2,
value = as.numeric(number_of_intersections)) %>%
dplyr::select(from,to,value)
}
if(term_id_or_name == "name"){
intersect_df <- data.frame(term_name_1 = gprofiler_results$term_name %>% unique() %>% as.character(),
term_name_2 = gprofiler_results$term_name %>% unique() %>% as.character()) %>%
tidyr::expand(term_name_1, term_name_2) %>%
dplyr::mutate(term_name_1 = term_name_1 %>% as.character(),
term_name_2 = term_name_2 %>% as.character()) %>%
dplyr::filter(term_name_1 < term_name_2) %>%
dplyr::inner_join(gprofiler_results %>%
dplyr::select(term_name, intersection), by = c("term_name_1" = "term_name")) %>%
dplyr::inner_join(gprofiler_results %>%
dplyr::select(term_name, intersection), by = c("term_name_2" = "term_name")) %>%
dplyr::mutate(intersection.x = intersection.x %>%
str_split(., ","),
intersection.y = intersection.y %>%
str_split(., ","),
intersection_of_x_and_y = Map(intersect, intersection.x, intersection.y),
number_of_intersections = Map(length, intersection_of_x_and_y) %>%
unlist()) %>%
dplyr::distinct(term_name_1, term_name_2, .keep_all = TRUE) %>%
dplyr::mutate(from = term_name_1,
to = term_name_2,
value = as.numeric(number_of_intersections)) %>%
dplyr::select(from,to,value)
}
return(as.data.frame(intersect_df))
}
chord_diagram_function <- function(chord_results){
mat <- chord_results %>%
dplyr::mutate(from = str_wrap(from, width = 20),
to = str_wrap(to, width = 20))
set.seed(1234)
grid.col <- setNames(rainbow(length(unlist(dimnames(mat)))), union(rownames(mat), colnames(mat)))
par(mar = c(0, 0, 0, 0), mfrow = c(1, 1))
# now, the image with rotated labels
chordDiagram(mat, annotationTrack = "grid", preAllocateTracks = 1, grid.col = grid.col)
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), ylim[1] + .1, cex = 0.75, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
circos.axis(h = "top", labels.cex = 0.5, major.tick.percentage = 0.2, sector.index = sector.name, track.index = 2)
}, bg.border = NA)
}chord_results <- get_intersections(gprofiler_results = gprofilerOutput$result %>%
dplyr::filter(query == "QTL_nom_gene",
term_size > 20,
term_size < 2000),
term_id_or_name = "name")
chord_diagram_function(chord_results = chord_results %>%
dplyr::filter(value > 10))sessionInfo()## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] circlize_0.4.8 plyr_1.8.4 data.table_1.12.8
## [4] gridExtra_2.3 scales_1.0.0 readxl_1.3.1
## [7] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
## [10] purrr_0.3.3 readr_1.3.1 tidyr_1.0.0
## [13] tibble_2.1.3 tidyverse_1.2.1 gprofiler2_0.1.7
## [16] ggpubr_0.2.4 magrittr_1.5 ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.3 lubridate_1.7.4 lattice_0.20-38
## [4] assertthat_0.2.1 zeallot_0.1.0 digest_0.6.23
## [7] mime_0.7 R6_2.4.1 cellranger_1.1.0
## [10] backports_1.1.5 evaluate_0.14 httr_1.4.1
## [13] pillar_1.4.2 GlobalOptions_0.1.1 rlang_0.4.2
## [16] lazyeval_0.2.2 rstudioapi_0.10 DT_0.9
## [19] rmarkdown_1.18 labeling_0.3 htmlwidgets_1.5.1
## [22] RCurl_1.95-4.12 munsell_0.5.0 shiny_1.3.2
## [25] broom_0.5.2 compiler_3.6.1 httpuv_1.5.2
## [28] modelr_0.1.5 xfun_0.11 pkgconfig_2.0.3
## [31] shape_1.4.4 htmltools_0.3.6 tidyselect_0.2.5
## [34] fansi_0.4.0 viridisLite_0.3.0 crayon_1.3.4
## [37] withr_2.1.2 later_0.8.0 bitops_1.0-6
## [40] grid_3.6.1 xtable_1.8-4 nlme_3.1-141
## [43] jsonlite_1.6 gtable_0.3.0 lifecycle_0.1.0
## [46] cli_2.0.0 stringi_1.4.3 ggsignif_0.6.0
## [49] promises_1.0.1 xml2_1.2.2 ellipsis_0.3.0
## [52] generics_0.0.2 vctrs_0.2.0 cowplot_1.0.0
## [55] tools_3.6.1 glue_1.3.1 hms_0.5.1
## [58] crosstalk_1.0.0 yaml_2.2.0 colorspace_1.4-1
## [61] rvest_0.3.4 plotly_4.9.1 knitr_1.25
## [64] haven_2.1.1